********************************************************************************
* 
* Merging CPS data and Outcomes data 
*   
********************************************************************************

clear all 



cap erase "$paper\SampleSelectionTable.tex"
file open myfile using "$paper\SampleSelectionTable.tex", write replace
file write myfile "\begin{table}[h!]" _n
file write myfile "\begin{center}" _n
file write myfile "\caption{Number of Individuals with CPS Interaction, May 2009 - Dec 2018}" _n
file write myfile "\label{tab:sampleselection}" _n
file write myfile "\begin{tabular}{l|c|c|c} \toprule \midrule" _n
file write myfile "\textbf{Restrictions}& \textbf{Year Observations} & \textbf{Unique Individuals} & \textbf{Referrals}  \\" _n
file write myfile "\hline" _n



*Merging CPS and Outcomes data 
use "$data\Processed\CPSdata", clear 
expand 11 
gen year=2008 
bys refer_id mci_uniq_id9: gen yearcount=_n 
replace year=2010 if yearcount==2 
replace year=2012 if yearcount==3 
replace year=2014 if yearcount==4 
replace year=2016 if yearcount==5 
replace year=2017 if yearcount==6 
replace year=2018 if yearcount==7 
replace year=2019 if yearcount==8 
replace year=2020 if yearcount==9 
replace year=2021 if yearcount==10 
replace year=2022 if yearcount==11 
drop yearcount 

merge m:1 mci_uniq_id9 year using "$data\Processed\Outcomes_mci"
drop if _merge==2 
tab _merge //three-quarters are matched 
drop _merge 


*Count Before Restrictions 
local indiv =  _N
distinct mci_uniq_id9 
local uniq = `r(ndistinct)'
distinct refer_id 
local ref = `r(ndistinct)'
file write myfile "1. On Referrals & `indiv' & `uniq' & `ref'  \\" _n

*First Few Sample Restrictions 
*Dropping observations where we do not know the identity of the person, and so could not reasonably find voter record data 
drop if unknown==1 //5761
drop if notinMCImatchingfile==1 //2224
drop if mci_id==. | dob_mdy==. | year(dob_mdy)<1900 //matching to voter records relies on names, birthdates, gender and as a last resort address information, 35978 obs 
local indiv =  _N
distinct mci_uniq_id9 
local uniq = `r(ndistinct)'
distinct refer_id 
local ref = `r(ndistinct)'
file write myfile "2. Demographics for Voter Matching & `indiv' & `uniq' & `ref'  \\" _n

*Dropping referrals of individuals living outside of PA 
keep if state_typ_cde=="PA" | state_typ_cde=="" //6329 deleted 
local indiv =  _N
distinct mci_uniq_id9 
local uniq = `r(ndistinct)'
distinct refer_id 
local ref = `r(ndistinct)'
file write myfile "3. Home address in PA & `indiv' & `uniq' & `ref'  \\" _n


*Replacing no voting/registration data to 0 
foreach x in rep dem voted registered {
    replace `x'=0 if `x'==. 
}
*Data is not available for all variables for all years 
replace voted=. if year==2017 | year==2019 | year==2021 | year==2022 
replace registered=. if year<2012 | year==2014 | year==2017 | year==2019 | year>2020 
replace dem=. if year<2012 | year==2014
replace rep=. if year<2012 | year==2014

*Replacing years where too young to vote to missing 
foreach x in voted registered {
    replace `x'=. if year==2008 & ((td(4Nov2008)-dob_mdy)/365)<18 
    replace `x'=. if year==2010 & ((td(2Nov2010)-dob_mdy)/365)<18 
    replace `x'=. if year==2012 & ((td(6Nov2012)-dob_mdy)/365)<18 
    replace `x'=. if year==2014 & ((td(4Nov2014)-dob_mdy)/365)<18 
    replace `x'=. if year==2016 & ((td(8Nov2016)-dob_mdy)/365)<18 
    replace `x'=. if year==2018 & ((td(6Nov2018)-dob_mdy)/365)<18 
	replace `x'=. if year==2020 & ((td(3Nov2020)-dob_mdy)/365)<18 
} 
foreach x in dem rep { 
    replace `x'=. if year==2016 & ((td(26Dec2016)-dob_mdy)/365)<18 
    replace `x'=. if year==2018 & ((td(24Dec2018)-dob_mdy)/365)<18 
    replace `x'=. if year==2020 & ((td(14Sep2020)-dob_mdy)/365)<18 
    replace `x'=. if year==2019 & ((td(11Feb2019)-dob_mdy)/365)<18 
    replace `x'=. if year==2017 & ((td(6Feb2017)-dob_mdy)/365)<18 
    replace `x'=. if year==2021 & ((td(8Feb2021)-dob_mdy)/365)<18 
	replace `x'=. if year==2022 & ((td(17Jan2022)-dob_mdy)/365)<18 
	replace `x'=. if year==2012 & ((td(19Nov2012)-dob_mdy)/365)<18 
}
drop if voted==. & registered==. & dem==. & rep==. //year-individual observations where we cannot observe any outcomes given age and/or available voting data 

*Requiring at least  1 voting year  
keep if td(3nov2002)>=dob_mdy | dob_mdy==. //last election date in same is Nov 3 2020, hence this restriction 
local indiv =  _N
distinct mci_uniq_id9 
local uniq = `r(ndistinct)'
distinct refer_id 
local ref = `r(ndistinct)'
file write myfile "4. Adult to vote at least once & `indiv' & `uniq' & `ref'  \\" _n


* At least one year pre-referral 
gen evs_y=year-intake_dt_y
bys refer_id mci_uniq_id: egen anypre=min(evs_y)
drop if anypre>=0 //observations for whom I cannot control for pre-existing differences 
//342,061 deleted
drop anypre

local indiv =  _N
distinct mci_uniq_id9 
local uniq = `r(ndistinct)'
distinct refer_id 
local ref = `r(ndistinct)'
file write myfile "5. Political outcomes before referral & `indiv' & `uniq' & `ref'  \\" _n


* No expungment of CPS records 
drop if ref_type=="CPS" & intake_dt_y<2017 //because of partial expungment in CPS records prior to 2017, 19254 obs deleted 
local indiv =  _N
distinct mci_uniq_id9 
local uniq = `r(ndistinct)'
distinct refer_id 
local ref = `r(ndistinct)'
file write myfile "6. No CPS record expungement & `indiv' & `uniq' & `ref'  \\" _n


* No TPR  
drop if  terminated==1  
local indiv =  _N
distinct mci_uniq_id9 
local uniq = `r(ndistinct)'
distinct refer_id 
local ref = `r(ndistinct)'
file write myfile "7. No Termination of Parental Rights  & `indiv' & `uniq' & `ref'  \\" _n


*voter records mostly trustworthy starting 2014 included 
gen voted_post2013=voted if year>=2014 
label var voted_post2013 "Voted data starting 2014"

label var rep "Registered as republican" 
label var dem "Registered as democrat"
label var voted "Voted" 
label var registered "Registered"

file write myfile "\bottomrule" _n
file write myfile "\end{tabular}" _n
file write myfile "\end{center}" _n
file write myfile "\end{table}" _n

file close myfile


save "$data\Processed\MainDataset", replace 


